查看原文
其他

R 语言结构方程模型:尝试用 R 画一个 PNAS 文章的 SEM 图

ecologyR ecologyR 2024-03-25

引言

新鲜出炉的 PNAS 文章,里面的结构方程模型代码是公开的(数据也在文章附件),

Positive associations fuel soil biodiversity and ecological networks worldwide

不少人应该注意到了,Dr. Manuel Delgado-Baquerizo 指导的论文中的结构方程模型的图很有特色:在结构方程模型图中,将不同类别的预测因子归入一个方框,路径系数在连线旁备注,以实现简洁的视觉效果,

  • Grouped the different categories of predictors into the same box in the model for graphical simplicity

又如下面一篇文章的图 2,

Multiple elements of soil biodiversity drive ecosystem functions across biomes

在此,研究了怎么使用 R 实现类似的效果(探索了初步效果,未完全完成),

0. 包

# 加载包
library(tidyverse)
library(tidygraph)
library(ggraph)
library(piecewiseSEM)
library(patchwork)

1. 数据

# 读取附件数据
# data <- read.csv("data/metanew.csv", row.names = 1)
# data[, 5:44] <- scale(data[, 5:44]) # 若仅仅使用 scale 函数标准化后,可能会使 piecewiseSEM 报错
# data <- as.data.frame(data)

# 或者写成
data <- read.csv("data/metanew.csv", row.names = 1) |>
mutate_if(
is.numeric,
~ as.numeric(scale(.x)) # 为了 piecewiseSEM 此处 scale 需要搭配 as.numeric 一起使用
)

2. 结构方程模型

# 对 cycfac motif 的 SEM 拟合
sem.model1 <- psem(
lm(
biodiversity ~ fragility + cycfac_total + modularity + # 生物
Distance_from_equator + Elevation + # 空间
TSEA + MAP + # 气候
pH + Texture + Soil_C, # 土壤
data = data
),
lm(
fragility ~ cycfac_total + modularity + # 生物
Distance_from_equator + Elevation + # 空间
TSEA + MAP + # 气候
pH + Texture + Soil_C, # 土壤
data = data
),
lm(
modularity ~ cycfac_total + # 生物
Distance_from_equator + Elevation + # 空间
TSEA + MAP + # 气候
pH + Texture + Soil_C, # 土壤
data = data
),
lm(
cycfac_total ~ Distance_from_equator + Elevation + # 空间
TSEA + MAP + # 气候
pH + Texture + Soil_C, # 土壤
data = data
),
lm(
pH ~ Distance_from_equator + Elevation + # 空间
TSEA + MAP, # 气候
data = data
),
lm(
Soil_C ~ Distance_from_equator + Elevation + # 空间
TSEA + MAP + # 气候
pH + Texture, # 土壤
data = data
),
lm(
Texture ~ Distance_from_equator + Elevation + # 空间
TSEA + MAP, # 气候
data = data
),
lm(
TSEA ~ Distance_from_equator + Elevation, # 空间
data = data
),
lm(
MAP ~ Distance_from_equator + Elevation + # 空间
TSEA, # 气候
data = data
)
)

3. 查看模型结果

拟合的不错,

# 查看 SEM 结果
# dSep 的 P 值都不显著,则拟合得不错
dSep(sem.model1, .progressBar = FALSE) |> print()
## Independ.Claim Test.Type DF Crit.Value P.Value
## 1 Texture ~ pH + ... coef 585 -1.505681 0.1326888
# fisherC 的 P 值大于 0.05,则拟合得不错
fisherC(sem.model1) |> print()
## Fisher.C df P.Value
## 1 4.039 2 0.133
# 随机抽取 10 行结果看看(整个结果表格非常长)
coefs(sem.model1) |>
select(1:7) |>
slice_sample(n = 10) |>
print()
## Response Predictor Estimate Std.Error DF Crit.Value P.Value
## 1 pH Distance_from_equator 0.0171 0.0406 586 0.4204 0.6744
## 2 cycfac_total Distance_from_equator -0.0617 0.0389 583 -1.5870 0.1130
## 3 TSEA Elevation -0.2323 0.0439 588 -5.2962 0.0000
## 4 modularity TSEA -0.2543 0.0462 582 -5.5024 0.0000
## 5 fragility TSEA -0.1916 0.0396 581 -4.8361 0.0000
## 6 cycfac_total Elevation -0.0933 0.0445 583 -2.0961 0.0365
## 7 fragility Distance_from_equator 0.0096 0.0332 581 0.2885 0.7731
## 8 fragility pH -0.0734 0.0339 581 -2.1633 0.0309
## 9 modularity Distance_from_equator 0.3103 0.0375 582 8.2663 0.0000
## 10 Soil_C TSEA 0.2732 0.0424 584 6.4479 0.0000

4. 绘图

# 获取 SEM 数据
dat0 <- coefs(sem.model1) |>
select(Predictor, Response, Estimate, P.Value) |>
mutate(Estimate = ifelse(Estimate == "-", NA, Estimate)) |>
filter(Predictor != "status") |>
setNames(nm = c("from0", "to0", "weight", "p"))

# 查看有哪些变量
unique(c(dat0$from0, dat0$to0))
## [1] "fragility" "cycfac_total" "modularity"
## [4] "Distance_from_equator" "Elevation" "TSEA"
## [7] "MAP" "pH" "Texture"
## [10] "Soil_C" "biodiversity"
# 数据操作
# 这是为了下面的 as_tbl_graph 函数:它依据 from、to 列名确定网络的箭头方向
dat <- dat0 |>
mutate(
from1 = case_when(
from0 %in% c("Distance_from_equator", "Elevation", "TSEA", "MAP","Soil_C", "pH", "Texture") ~ "ENV",
TRUE ~ from0
),
to1 = case_when(
to0 %in% c("Distance_from_equator", "Elevation", "TSEA", "MAP","Soil_C", "pH", "Texture") ~ "ENV",
TRUE ~ to0
),
from2 = case_when(
from0 %in% c("TSEA", "MAP") ~ "Climate",
from0 %in% c("Distance_from_equator", "Elevation") ~ "Space",
TRUE ~ "Soil"
),
to2 = case_when(
to0 %in% c("TSEA", "MAP") ~ "Climate",
to0 %in% c("Distance_from_equator", "Elevation") ~ "Space",
TRUE ~ "Soil"
)
)

5. 绘图

5.1 先设置一个绘图主题,

# 设置一个全局可用的主题

font <- "sans"

theme_set(
theme_void(
base_family = font,
base_size = 12
) +
theme(
plot.background = element_rect(color = NA, fill = "white")
)
)

5.2 绘图 1:完整模型

注意到,ENV 到 modularity 和 biodiversity 等指标,有多条路径,

# 图 1
graph1 <- dat |>
relocate(from = from1, to = to1) |>
as_tbl_graph(directed = TRUE)

# 绘图 1
figure1 <- graph1 |>
ggraph(layout = "circle") +
geom_edge_parallel(
aes(
color = ifelse(weight > 0, "+", "-"), # 指定正、负相关性颜色
width = abs(weight), # 宽度正比于路径系数的绝对值
alpha = ifelse(p < 0.05, "P < 0.05", "P > 0.05"), # 不显示不显著路径,使得透明度为 0
start_cap = label_rect(node1.name, padding = margin(10, 10, 10, 10)), # 箭头起始端部离节点的间距
end_cap = label_rect(node2.name, padding = margin(10, 10, 10, 10)) # 箭头结束端离节点的间距
),
arrow = arrow(angle = 60, length = unit(0.2, "line")), # 添加箭头
sep = unit(0.4, "line") # 平形线的间距
) +
# geom_edge_loop( # 存在循环(可以不使用 loop 这段代码)
# aes(
# strength = 0.9,
# direction = -135
# ),
# color = "grey",
# start_cap = circle(0),
# end_cap = circle(0.6),
# arrow = arrow(angle = 60, length = unit(0.4, "line"))
# ) +
geom_node_label( # 添加节点(变量)文字标签
aes(label = name),
size = 4,
label.r = unit(0.5, "line"),
family = font
) +
scale_edge_color_manual(name = NULL, values = c("+" = "lightblue2", "-" = "grey0")) +
scale_edge_width_continuous(name = NULL, limits = c(0, 1), range = c(0.2, 2.4)) +
scale_edge_alpha_manual(guide = "none", values = c(1, 0)) +
facet_grid(. ~ "SEM") +
coord_equal(
xlim = c(-1.1, 1.1),
clip = "off"
) +
# theme_minimal() + # 去掉注释,则方便查看图的坐标
theme(
strip.background = element_rect(),
strip.text = element_text(margin = margin(5, 5, 5, 5)),
legend.position = "none"
)
figure1

5.3 绘图 2:子模型

土壤子模型,

# 图 2
graph2 <- dat |>
filter(from1 == "ENV", to1 == "ENV") |>
relocate(from = from2, to = to2, weight, p) |>
as_tbl_graph(directed = TRUE)

# 绘图 2
# 代码注释见上一段
figure2 <- graph2 |>
ggraph(layout = "stress") +
geom_edge_parallel(
aes(
color = ifelse(weight > 0, "+", "-"),
width = abs(weight),
alpha = ifelse(p < 0.05, "P < 0.05", "P > 0.05"),
start_cap = label_rect(node1.name, padding = margin(10, 10, 10, 10)),
end_cap = label_rect(node2.name, padding = margin(10, 10, 10, 10))
),
label_push = unit(0.5, "line"),
angle_calc = "along",
arrow = arrow(angle = 60, length = unit(0.2, "line")),
sep = unit(0.4, "line")
) +
# geom_edge_loop(
# aes(
# strength = 0.9,
# direction = c(rep(90, 10), rep(-90, 9))
# ),
# color = "grey",
# start_cap = circle(0),
# end_cap = circle(0.6),
# arrow = arrow(angle = 60, length = unit(0.4, "line"))
# ) +
# geom_node_point(size = 8) +
geom_node_label(
aes(label = name),
size = 4,
label.r = unit(0.5, "line"),
family = font,
# vjust = 0
) +
scale_edge_color_manual(name = NULL, values = c("+" = "lightblue2", "-" = "grey0")) +
scale_edge_width_continuous(name = NULL, limits = c(0, 1), range = c(0.2, 2.4)) +
scale_edge_alpha_manual(guide = "none", values = c(1, 0)) +
facet_grid(. ~ "Soil sub-model") +
coord_equal(
clip = "off"
) +
# theme_minimal() +
theme(
strip.background = element_rect(),
strip.text = element_text(margin = margin(5, 5, 5, 5)),
legend.position = "none"
)
figure2

5.4 其他代码

标记路径系数有点麻烦了,连线太多!可以考虑合并路径,然后在旁边(手动)列出系数——正如论文中那样,

但此时,需要面对一个问题,即:路径宽度(width)怎么确定?是所有子路径系数的和,还是子路径系数绝对值的和?

  • 打赏并私信发来您的邮箱,可获取剩余完整代码(通过邮件发送)

figure3

结语

还可以继续修改。如果想不离开 R 语言直接调整布局,可以参考之前的另一篇公众号文章(独家 R 语言小工具:结构方程模型从拟合到作图,完全使用 R 解决!但遗憾的是,此工具没有针对这种节点之间多条连接的情景做优化),

# 保存为 .csv 表格,即可以用 Shiny APP 小工具调整布局
dat |>
relocate(from = from1, to = to1) |>
write_csv("graph1-sem-table.csv")


继续滑动看下一个
向上滑动看下一个

您可能也对以下帖子感兴趣

文章有问题?点此查看未经处理的缓存